/****************************************************************************
	Date: Dec. 2, 2011, updated in July, 2013
	
	Program: export-90-08 called in 01_brncustoms
	
	This file:
	- uploads export data from $datapathCustoms (year-by-year)
	- harmonizes the format of variables
	- drops duplicated observations
	- sums export towards Belgium and Luxemburg (defined as one single destination up to 1998)
	- aggregates at the firm*destination level (across nc8 products)
	- appends annual file into a panel of exports
	
	Inputs: 
	- working paths (from 01_brncustoms)
	
	Output:
	- export90-08.dta
	- variables in export90-08.dta: siren year pays export (value)
	
****************************************************************************/

clear
set more off

* CHECH EXPORT DATA
forvalues i=1990(1)2008{
di `i'
use $datapathCustoms/newexports_`i', clear
tab year
format nc8  %8.0gc
tostring nc8, replace
gen length=length(nc8)
tab length
replace nc8="0"+nc8 if length==7
drop length
format siren %9.0gc
tostring siren, replace
gen length=length(siren)
tab length
replace siren="0"+siren if length==8
replace siren="00"+siren if length==7
replace siren="000"+siren if length==6
replace siren="0000"+siren if length==5
replace siren="00000"+siren if length==4
replace siren="000000"+siren if length==3
replace siren="0000000"+siren if length==2
replace siren="00000000"+siren if length==1
drop length
g length=length(reso)
tab length
drop length
* pb with reso when exporting in sas + address is missing
* track duplicates
duplicates tag nc8 siren pays natr temo vart oblig dept reso paystrans pypr quan usup oblig, g(tag)
tab tag
drop tag
* no duplicates
duplicates tag nc8 siren pays natr temo vart oblig reso paystrans pypr quan usup oblig, g(tag)
tab tag
* several observations with different dept => keep only one
bysort nc8 siren pays natr temo vart oblig reso paystrans pypr quan usup oblig: keep if _n==1
drop tag
duplicates tag nc8 siren pays vart, g(tag)
tab tag
drop tag
save $datapath/newexports_`i', replace
tab pypr
tab pays if pypr=="BELGIQUE"
tab pays if pypr=="LUXEMBOURG"
tab pays if pypr=="BELGIQUE ET LUXEMBOURG"
* BE and LU after 1998
* XU until 1998
}

cd $datapath
forvalues x=1990(1)2008{
use newexports_`x', clear
duplicates drop nc8 siren pays vart, force
keep nc8 siren pays vart pypr quan usup oblig year
rename vart export 
rename quan qty

drop if siren=="000000000"
drop if siren=="999999999"

if `x'>1998{

*On règle le pb Bel/Lux en ne faisant de ces 2 pays qu'un seul
replace pays="BE" if pays=="LU"
bysort siren nc8 pays: egen var=sum(export)
bysort siren nc8 pays: egen var2=sum(qty)
bysort siren nc8 pays: egen var3=sum(usup)
bysort siren nc8 pays: keep if _n==1
replace export=var if export!=var
replace qty=var2 if qty!=var2
replace usup=var2 if usup!=var3
drop var var2
replace pypr="BELGIQUE ET LUXEMBOURG" if pays=="BE"
}
else{
replace pays="BE" if pays=="XU"     // On affecte le commerce UEBL à la Belgique pour 1998
}
replace pays="DE" if pays=="DD" // RDA
replace pays="RU" if pays=="SU" // URSS

drop if pays=="XH"|pays=="3"|pays=="nu"|substr(pays,1,1)=="X"|pays=="FA"|pays=="QQ"|pays=="QU"|pays=="QV"|pays=="QW"


label var siren "SIREN firm code"
label var year "Year"
label var export "Export value"
label var qty "Export quantity"
label var nc8 "Product - nc8 classification"
label var oblig "Niveau d'obligation"
order  siren year nc8 export qty usup oblig
save export`x', replace
erase newexports_`x'.dta
}



* Aggregate at the firm level (across products) and at the firm * product level 
* correspondance across hs classifications and with ISIC rev3
clear
insheet using $datapathother/HS96_HS02.txt, names
tostring hs02, replace
replace hs02="0"+hs02 if length(hs02)==5
tostring hs96, replace
replace hs96="0"+hs96 if length(hs96)==5
keep hs02 hs96
sort hs02
save $datapath/temp, replace
clear
insheet using $datapathother/hs2002ToIsicrev3.csv, names delimiter(";")
tostring productcode, g(hs02)
replace hs02="0"+hs02 if length(hs02)==5
tostring isicrevision3productcode, g(isic)
replace isic="0"+isic if length(isic)==3
g _=substr(isic,1,4)
g isicrev3=substr(isic,1,2)
replace isicrev3=_ if _=="2423"
keep hs02 isicrev3
sort hs02
save $datapath/temp_hs02toisic, replace
merge 1:m hs02 using $datapath/temp
drop _merge
sort hs96 isicrev3
by hs96 isicrev3: keep if _n==1
duplicates tag hs96, g(_)
* 36 hs1996 sectors have ambiguous isic correspondance
sort hs96 isicrev3
by hs96: keep if _n==1
drop _
keep hs96 isicrev3
sort hs96
save $datapath/temp_hs96toisic, replace
clear
insheet using $datapathother/HS07_HS02.txt, names
tostring hs07, replace
replace hs07="0"+hs07 if length(hs07)==5
tostring hs02, replace
replace hs02="0"+hs02 if length(hs02)==5
keep hs02 hs07
sort hs02
save $datapath/temp, replace
clear
insheet using $datapathother/hs2002ToIsicrev3.csv, names delimiter(";")
tostring productcode, g(hs02)
replace hs02="0"+hs02 if length(hs02)==5
tostring isicrevision3productcode, g(isic)
replace isic="0"+isic if length(isic)==3
g _=substr(isic,1,4)
g isicrev3=substr(isic,1,2)
replace isicrev3=_ if _=="2423"
keep hs02 isicrev3
sort hs02
merge 1:m hs02 using $datapath/temp
keep if _merge==3
drop _merge
sort hs07 isicrev3
by hs07 isicrev3: keep if _n==1
duplicates tag hs07, g(_)
* 54 hs2002 sectors have ambiguous isic correspondance
sort hs07 isicrev3
by hs07: keep if _n==1
keep hs07 isicrev3
sort hs07
save $datapath/temp_hs07toisic, replace
clear
insheet using $datapathother/HS96ToHS92.txt, names
tostring hs96, replace
replace hs96="0"+hs96 if length(hs96)==5
tostring hs1988, replace
replace hs1988="0"+hs1988 if length(hs1988)==5
keep hs1988 hs96
sort hs96
save $datapath/temp, replace
clear
insheet using $datapathother/HS96_HS02.txt, names
tostring hs02, replace
replace hs02="0"+hs02 if length(hs02)==5
tostring hs96, replace
replace hs96="0"+hs96 if length(hs96)==5
keep hs02 hs96
sort hs96
merge hs96 using temp
drop _merge
sort hs1988 hs02
duplicates drop hs1988 hs02, force
duplicates tag hs1988, g(_)
tab _
keep hs02 hs1988
sort hs02
save $datapath/temp, replace
clear
insheet using $datapathother/hs2002ToIsicrev3.csv, names delimiter(";")
tostring productcode, g(hs02)
replace hs02="0"+hs02 if length(hs02)==5
tostring isicrevision3productcode, g(isic)
replace isic="0"+isic if length(isic)==3
g _=substr(isic,1,4)
g isicrev3=substr(isic,1,2)
replace isicrev3=_ if _=="2423"
keep hs02 isicrev3
merge 1:m hs02 using $datapath/temp
drop _merge
sort hs1988 isicrev3
by hs1988 isicrev3: keep if _n==1
duplicates tag hs1988, g(_)
tab _
* 36 hs1988 sectors have ambiguous isic correspondance
rename hs1988 hs88
sort hs88 isicrev3
by hs88: keep if _n==1
drop _
keep hs88 isicrev3
sort hs88
save $datapath/temp_hs88toisic, replace



cd $datapath
forvalues x=1990(1)2008{
	use export`x', clear
	sort siren pays year oblig
	collapse (sum) export  , by (siren pays year)
	save temp`x', replace
	use export`x', clear
	g hs4=substr(nc8,1,4)
	sort siren pays year hs4 oblig
	collapse (sum) export , by (siren pays year hs4)
	save temphs4`x', replace
	use export`x', clear
	g hs2=substr(nc8,1,2)
	sort siren pays year hs2 oblig
	collapse (sum) export  , by (siren pays year hs2)
	save temphs2`x', replace
	use export`x', clear
	g hs2=substr(nc8,1,2)
	destring hs2, replace
	g hssec=""
	replace hssec="I" if hs2<6
	replace hssec="II" if hs2>=6 & hs2<14
	replace hssec="III" if hs2==15
	replace hssec="IV" if hs2>=16 & hs2<25
	replace hssec="V" if hs2>=25 & hs2<28
	replace hssec="VI" if hs2>=28 & hs2<39
	replace hssec="VII" if hs2>=39 & hs2<41
	replace hssec="VIII" if hs2>=41 & hs2<44
	replace hssec="IX" if hs2>=44 & hs2<47
	replace hssec="X" if hs2>=47 & hs2<50
	replace hssec="XI" if hs2>=50 & hs2<64
	replace hssec="XII" if hs2>=64 & hs2<68
	replace hssec="XIII" if hs2>=68 & hs2<71
	replace hssec="XIV" if hs2==71
	replace hssec="XV" if hs2>=72 & hs2<84
	replace hssec="XVI" if hs2>=84 & hs2<86
	replace hssec="XVII" if hs2>=86 & hs2<90
	replace hssec="XVIII" if hs2>=90 & hs2<93
	replace hssec="XIX" if hs2==93
	replace hssec="XX" if hs2>=94 & hs2<97
	replace hssec="XXI" if hs2>=97 & hs2<99
	sort siren pays year hssec oblig
	collapse (sum) export  , by (siren pays year hssec)
	save temphssec`x', replace
}
forvalues x=1990(1)1995{
	use export`x', clear
	g hs6=substr(nc8,1,6)
	rename hs6 hs88
	sort hs88
	merge hs88 using temp_hs88toisic
	drop if _merge==2
	drop _merge
	rename hs88 hs6
	sort siren pays year isicrev3
	collapse (sum) export , by (siren pays year isicrev3)
	drop if isicrev3==""
	save tempisicrev3`x', replace
	erase export`x'.dta
}
forvalues x=1996(1)2001{
	use export`x', clear
	g hs6=substr(nc8,1,6)
	rename hs6 hs96
	sort hs96
	merge hs96 using temp_hs96toisic
	drop if _merge==2
	drop _merge
	rename hs96 hs6
	sort siren pays year isicrev3
	collapse (sum) export  , by (siren pays year isicrev3)
	drop if isicrev3==""
	save tempisicrev3`x', replace
	erase export`x'.dta
}
forvalues x=2002(1)2006{
	use export`x', clear
	g hs6=substr(nc8,1,6)
	rename hs6 hs02
	sort hs02
	merge hs02 using temp_hs02toisic
	drop if _merge==2
	drop _merge
	rename hs02 hs6
	sort siren pays year isicrev3
	collapse (sum) export , by (siren pays year isicrev3)
	drop if isicrev3==""
	save tempisicrev3`x', replace
	erase export`x'.dta
}
forvalues x=2007(1)2008{
	use export`x', clear
	g hs6=substr(nc8,1,6)
	rename hs6 hs07
	sort hs07
	merge hs07 using temp_hs07toisic
	drop if _merge==2
	drop _merge
	rename hs07 hs6
	sort siren pays year isicrev3
	collapse (sum) export  , by (siren pays year isicrev3)
	drop if isicrev3==""
	save tempisicrev3`x', replace
	erase export`x'.dta
}

* Append year tables
use temp1990, clear
forvalues x=1991(1)2008{
append using temp`x'
erase temp`x'.dta
}
drop if pays=="FR"
sort siren year pays
save export90-08, replace
use temphs21990, clear
forvalues x=1991(1)2008{
append using temphs2`x'
erase temphs2`x'.dta
}
drop if pays=="FR"
sort siren year pays hs2
save exporths2_90-08, replace
use temphs41990, clear
forvalues x=1991(1)2008{
append using temphs4`x'
erase temphs4`x'.dta
}
drop if pays=="FR"
sort siren year pays hs4
save exporths4_90-08, replace
use tempisicrev31990, clear
forvalues x=1991(1)2008{
append using tempisicrev3`x'
erase tempisicrev3`x'.dta
}
drop if pays=="FR"
sort siren year pays isicrev3
save exportisicrev3_90-08, replace
use temphssec1990, clear
forvalues x=1991(1)2008{
append using temphssec`x'
erase temphssec`x'.dta
}
drop if pays=="FR"
sort siren year pays hssec
save exporthssec_90-08, replace
use exportisicrev3_90-08, clear
g wiotsec=""
replace wiotsec="A01" if isicrev3=="01"
replace wiotsec="A02" if isicrev3=="02"	
replace wiotsec="A03" if isicrev3=="05" 
replace wiotsec="B" if isicrev3=="10"|isicrev3=="11"|isicrev3=="12"|isicrev3=="13"|isicrev3=="14"
replace wiotsec="C10-C12" if isicrev3=="15"|isicrev3=="16"
replace wiotsec="C13-C15" if isicrev3=="17"|isicrev3=="18"|isicrev3=="19"
replace wiotsec="C16" if isicrev3=="20"
replace wiotsec="C17" if isicrev3=="21"
replace wiotsec="C18" if isicrev3=="22"
replace wiotsec="C19" if isicrev3=="23"
replace wiotsec="C20" if isicrev3=="24"
replace wiotsec="C21" if isicrev3=="2423"
replace wiotsec="C22" if isicrev3=="25"
replace wiotsec="C23" if isicrev3=="26"
replace wiotsec="C24" if isicrev3=="27"
replace wiotsec="C25" if isicrev3=="28"
replace wiotsec="C26" if isicrev3=="30"|isicrev3=="33"
replace wiotsec="C27" if isicrev3=="31"|isicrev3=="32"
replace wiotsec="C28" if isicrev3=="29"
replace wiotsec="C29" if isicrev3=="34"
replace wiotsec="C30" if isicrev3=="35"
replace wiotsec="C31_C32" if isicrev3=="36"
sort siren pays year wiotsec
collapse (sum) export , by (siren pays year wiotsec)
drop if wiotsec==""
save exportwiotsec_90-08, replace

use exportisicrev3_90-08, clear
g isic3code_2dig=substr(isicrev3,1,2)
ge wiot12code =""
replace wiot12code = "AtB" if isic3code_2dig == "01" | isic3code_2dig == "02" | isic3code_2dig == "05"
replace wiot12code = "C" if isic3code_2dig == "10" |  isic3code_2dig == "11"| isic3code_2dig == "12"| isic3code_2dig == "13" | isic3code_2dig == "14"
replace wiot12code = "15t16" if isic3code_2dig == "15" |  isic3code_2dig == "16"
replace wiot12code = "17t18" if isic3code_2dig == "17" |  isic3code_2dig == "18"
replace wiot12code = "19" if isic3code_2dig == "19"
replace wiot12code = "20" if isic3code_2dig == "20"
replace wiot12code = "21t22" if isic3code_2dig == "21" |  isic3code_2dig == "22"
replace wiot12code = "23" if isic3code_2dig == "23"
replace wiot12code = "24" if isic3code_2dig == "24"
replace wiot12code = "25" if isic3code_2dig == "25"
replace wiot12code = "26" if isic3code_2dig == "26"
replace wiot12code = "27t28" if isic3code_2dig == "27" |  isic3code_2dig == "28"
replace wiot12code = "29" if isic3code_2dig == "29"
replace wiot12code = "30t33" if isic3code_2dig == "30" |  isic3code_2dig == "31"| isic3code_2dig == "32"| isic3code_2dig == "33"
replace wiot12code = "34t35" if isic3code_2dig == "34" |  isic3code_2dig == "35"
replace wiot12code = "36t37" if isic3code_2dig == "36" |  isic3code_2dig == "37"
replace wiot12code = "E" if isic3code_2dig == "40" |  isic3code_2dig == "41"
replace wiot12code = "F" if isic3code_2dig == "45"
replace wiot12code = "50" if isic3code_2dig == "50"
replace wiot12code = "51" if isic3code_2dig == "51"
replace wiot12code = "52" if isic3code_2dig == "52"
replace wiot12code = "H" if isic3code_2dig == "55"
replace wiot12code = "60" if isic3code_2dig == "60"
replace wiot12code = "61" if isic3code_2dig == "61"
replace wiot12code = "62" if isic3code_2dig == "62"
replace wiot12code = "63" if isic3code_2dig == "63"
replace wiot12code = "64" if isic3code_2dig == "64"
replace wiot12code = "J" if isic3code_2dig == "67" |  isic3code_2dig == "66"| isic3code_2dig == "65"
replace wiot12code = "70" if isic3code_2dig == "70"
replace wiot12code = "71t74" if isic3code_2dig == "71" |  isic3code_2dig == "72"| isic3code_2dig == "73"| isic3code_2dig == "74"
replace wiot12code = "L" if isic3code_2dig == "75"
replace wiot12code = "M" if isic3code_2dig == "80"
replace wiot12code = "N" if isic3code_2dig == "85"
replace wiot12code = "O" if isic3code_2dig == "90" |  isic3code_2dig == "91"| isic3code_2dig == "92"| isic3code_2dig == "93"
replace wiot12code = "P" if isic3code_2dig == "95"
sort siren pays year wiot12code
collapse (sum) export , by (siren pays year wiot12code)
drop if wiot12code==""
save exportwiot12_90-08, replace

erase temp1990.dta
erase temphs41990.dta
erase temphs21990.dta
erase temphssec1990.dta
erase tempisicrev31990.dta
